clear
gen x = .
tempfile empty
save empty, replace
set type double, permanently


// globals only relevant for this do file---Global switches to turn on/off sections of the do file
	global Data_GIS_section0_5 						"1" // utility name masterlist for the shapefiles. correspondence with EIAID and OpenEI based data.
	global Data_GIS_section2_spatial_RD 			"1"  // matching CBG pairs by distance across border.



if $Data_GIS_section0_5 == 1 {
		set more off
		
		use "${Data_Clean}/Electricity_rate_data_panel_collapsed", clear

		drop utility rate_name source mostrecent_label // variables dropped to make data light
		compress
		
		tempfile electricity_panel
		save `electricity_panel', replace
		
		// masterlist crosswalk of different utility names and the EIAID (From OpenEI data base, which we use to get most of rates data)
		import excel "${ElectricityRates}/utility_names_eiaid_masterlist.xlsx", clear firstrow
		drop utility // utility name in the openEI data.
		tempfile utility_name_masterlist
		save `utility_name_masterlist', replace
		
		import excel cbg=A cbg_name=B mean_income=C population=E census_bg_area=G rowid=H utility_name=J using ///
			"${Boundaries}/overlap_boundaries_proper_new.xlsx", clear cellrange(A2)
		destring cbg, replace force
		duplicates tag cbg, gen(overlap)
		drop if overlap !=0
		drop overlap
		replace census_bg_area = census_bg_area/(1609^2)

		// create correspondence of various utility names and eiaids
		merge m:1 utility_name using `utility_name_masterlist'
		tab _merge
		keep if _merge == 3
		drop _merge
		tempfile utility_bg_masterlist 
		save `utility_bg_masterlist', replace

		// Import Census Tracts to Zip Codes map and save tempfile
		import delimited "${Boundaries}/join_ZCTA_bg.csv", clear varnames(1)
		keep geoidx namex geoidy namey
		destring geoidy, force replace
		gen colid = _n
		ren geoidx cbg
		ren geoidy zipcode_primary
		ren namex cbg_name 
		ren namey zipcode_primary_name
		
		tempfile ZipMap
		save `ZipMap', replace
		
		// PGE service regions and zip codes
		import excel "${Boundaries}/RESZIPS.XLS", clear cellrange(A2:E1220) firstrow
		*tostring ZIPCODE, format(%12.0f) replace
		ren ZIPCODE zipcode_primary
		
		gsort zipcode_primary -Percentage
		by zipcode_primary: keep if _n == 1
		ren BaselineTerritory region
		keep zipcode_primary region
		isid zipcode_primary
		tempfile zip_region_PGE
		save `zip_region_PGE', replace
		
		// merge census-tract zip to zip region PGE
		use `ZipMap', clear
		merge m:1 zipcode_primary using `zip_region_PGE', keep(1 3) // include zips/ZCTA for other merges
		drop _merge
		isid cbg
		drop cbg_name zipcode_primary_name
		tempfile bg_PGETerritory
		save `bg_PGETerritory', replace
		
		// gas data
		use "${Restricted_Data}/OPIS/zip_month_avg_retail_price.dta", clear
		keep if year <= 2017 // at least for now, thats how far Experian data runs.
		ren mailingzip zipcode_primary
		tempfile GasData
		save `GasData', replace
		
		// CBG to climate zone crosswalk
		import delimited "${Boundaries}/CBG_climate_zone.csv", clear varnames(1)
		tempfile CBG_climate_zone
		save `CBG_climate_zone', replace
		
		// CBG level DMV data from 2013
		import delimited "${Restricted_Data}/DMV/CBG_dmv_2013.csv", clear varnames(1)
		drop year
		la var fuel_economy_mean "CBG 2013: mean fuel economy"
		la var standard_count "CBG 2013: standard vehicle count"
		la var luxury_count "CBG 2013: luxury vehicle count"
		la var total_car_count "CBG 2013: total car count"
		la var percent_luxury "CBG 2013: percent luxury"
		la var hybrid_count "CBG 2013: hybrid count"
		la var prius_count "CBG 2013: Prius count"
		tempfile CBG_dmv_2013
		save `CBG_dmv_2013', replace
		
		// MUD data at CBG level
		import delimited "${ACS}/CBG_MUDs.csv", clear varnames(1)
		ren total total_hh
		tempfile CBG_MUDs
		save `CBG_MUDs', replace
		
		import delimited "${ACS}/med_income_bg_2013.csv", clear varnames(1)
		keep geoid 
		ren geoid cbg 
		gen colid = _n
		tempfile colid_to_cbg
		save colid_to_cbg, replace
}


if $Data_GIS_section2_spatial_RD == 1 {

	
	set more off
	*use "${Data_Clean}/cbg_boundary_matched.dta", clear
	use "${Boundaries}/cbg_boundary_muni_matched_named.dta", clear
	drop ref_name
	drop adj_name
	drop id
	drop sa_id_adj
	ren sa_id_ref groupid // groupid --- utility pair id
	ren ref rowid
	ren adj rowid_adj
	ren adj_cbg cbg_adj
	ren ref_cbg cbg
	ren distance_to_adj distance_util_adj
	ren distance_to_ref distance_util
	ren distance distance_btw_cbg
	foreach var in cbg cbg_adj rowid rowid_adj groupid {
		destring `var', replace
	} 
	// some obs are missing matches, so drop them
	drop if mi(cbg) == 1| mi(cbg_adj) == 1
	
	// now merge other cbg-level variables
	preserve
		use `utility_bg_masterlist', clear
		gen pop_density = population/census_bg_area
		la var pop_density "pop density (ppl/sq mile)"
		la var cbg "census block group (ACS)"
		order cbg 
		tempfile collevel
		save `collevel', replace
		foreach var of varlist cbg-pop_density {
			ren `var' `var'_adj
		}
		tempfile collevel_adj
		save `collevel_adj', replace
	restore

	merge m:1 cbg using `collevel', nogen keep(1 3)
	merge m:1 cbg_adj using `collevel_adj', nogen keep(1 3)
	

	summ groupid
	levelsof groupid, local(groupid)
	foreach i of local groupid {
		preserve
			keep if groupid == `i'
			foreach varbit in rowid distance_util cbg ///
				mean_income population census_bg_area pop_density ///
				utility_name eiaid {
				ren `varbit'_adj `varbit'1
				ren `varbit' `varbit'0
			}
			gen group_pair_id = _n
			reshape long rowid distance_util cbg ///
				mean_income population census_bg_area pop_density ///
				utility_name eiaid, i(distance_btw_cbg groupid group_pair_id) j(adjacent)
			gen years = 4
			gen months = 12
			expand years
			gen year = 2013
			bys groupid group_pair_id cbg: gen n = _n
			replace year = year + n
			
			expand months
			bys groupid group_pair_id cbg year: gen month = _n 
			assert month <=12
			drop years months
			isid groupid group_pair_id cbg year month 
			
			merge m:1 cbg year month using "${Data_Clean}/Experian_at_bg_level.dta", keep(1 3)
			
			
			merge m:1 eiaid year month using `electricity_panel', keep(1 3) nogen
		
			
			merge m:1 cbg using `bg_PGETerritory', keep(1 3) nogen
		
			replace region = "" if eiaid != 14328
			
			foreach var of varlist Tier?UsageAmt MaxTierUsageAmt pastlastupdate	{
				foreach letter in P R S T V X {
					replace `var' = `var'_`letter' if region == "`letter'" & eiaid == 14328 
				}
			}	
			drop AvgRateTier?_? Tier?UsageAmt_? MaxTierUsageAmt_? HighestTierAvgRate_? pastlastupdate_?
			la var region "utility region (PGE)"
			

			compress
			drop n _merge
			save "${Data_Clean}/matched_data_placebo_group`i'.dta", replace 
			
		restore
	}
	use empty, clear
	foreach j of local groupid {
		append using "${Data_Clean}/matched_data_placebo_group`j'.dta"
		erase "${Data_Clean}/matched_data_placebo_group`j'.dta"
	}
	// merge climate zones data, and use it to identify max usage for SDGE (and SCE, when we get data)
	merge m:1 cbg using `CBG_climate_zone', keep(1 3) nogen
	forvalues i = 0/3 {
		replace Tier`i'UsageAmt =  Tier`i'UsageAmt_Coastal if utility == "San Diego Gas & Electric" & (climate_zone == "6" | climate_zone == "8")
		replace Tier`i'UsageAmt =  Tier`i'UsageAmt_Inland if utility == "San Diego Gas & Electric" & climate_zone == "10"
		replace Tier`i'UsageAmt =  Tier`i'UsageAmt_Mountain if utility == "San Diego Gas & Electric" & climate_zone == "14"
		replace Tier`i'UsageAmt =  Tier`i'UsageAmt_Desert if utility == "San Diego Gas & Electric" & climate_zone == "15"
	}
	drop Tier?UsageAmt_Coastal Tier?UsageAmt_Inland Tier?UsageAmt_Mountain Tier?UsageAmt_Desert
	
	// now merge gas price data and MUDs
	merge m:1 zipcode_primary year month using `GasData', keep(1 3) nogen
	merge m:1 cbg using `CBG_dmv_2013', keep(1 3) nogen
	merge m:1 cbg using `CBG_MUDs', keep(1 3) nogen
	drop x // this was in the default empty dataset. nothing in it so drop it.
	la var groupid "Adjacent Utility pair ID"
	la var group_pair_id "Within utility-pair, CBG-pair ID"
	la var cbg "Census Block Group ID"
	la var adjacent "1 if observation is in adjacent utility"
	la var distance_btw_cbg "distance between matched CBGs"
	la var distance_util "distance to the utility border with the adjacent, paired utility"
	la var rowid "utility ID (row order) in the utility shapefile"
	la var mean_income "mean household income"
	la var population "population"
	la var census_bg_area "CBG area (sq. mile)"
	la var pop_density "population/sq. mile"
	la var eiaid "EIA's utility number"
	la var year "Year"
	la var month "Month"
	la var ref_count "Number of CBGs from the reference utility"
	la var adj_count "Number of CBGs from the adjacent utility for a given ref utility"
	la var HighestTierAvgRate "Weekday avearge top tier rate"
	la var zipcode_primary "Zipcode with which CBG has greatest overlap in area"
	forvalues i = 0/4 {
		la var Tier`i'UsageAmt "KWh/month used to get to tier `i'"
	}
	la var MaxTierUsageAmt "KWh/month used to get to the top tier"

	la var manually_collected "Utility rates manually collected"
	order groupid group_pair_id adjacent cbg year month distance_btw_cbg distance_util 
	save "${Data_Clean}/EVSales_Panel_Data_distance_matched_CBG_placebo.dta", replace
}

